** matching gas stations from price data and Autobahn info

clear
import delimited "$P_Data/Autobahn Stations/all_AB_stations.csv"

rename v1 AB_station_brand
rename v2 AB_lat
rename v3 AB_long
gener AB_id = _n

replace AB_station_brand = lower(AB_station_brand)
* dropping highway rest-stops without any gas stations
drop if AB_station_brand=="segafredo"
drop if AB_station_brand=="eenbw"
drop if AB_station_brand=="eeon"
drop if AB_station_brand=="einnogy"
drop if AB_station_brand=="eother"
drop if AB_station_brand=="etur"


cross using  "$P_Data/GS/gas_station.dta"

keep AB_* StID Brand Name Lat Lng


drop if Lat==.

geodist AB_lat AB_long Lat Lng, generate(distance)

sort AB_id distance
* checking 4 closest stations
by AB_id: keep if _n<5

by AB_id: gener matching_1st_brand = (AB_station_brand==Brand) if _n==1
by AB_id: replace matching_1st_brand = matching_1st_brand[1]

by AB_id: drop if _n>1 & matching_1st_brand==1

by AB_id: drop if matching_1st_brand==0 & _n==1

by AB_id: gener matching_2nd_brand = (AB_station_brand==Brand) if _n==1 & matching_1st_brand==0
by AB_id: replace matching_2nd_brand = matching_2nd_brand[1]
replace matching_2nd_brand=1 if matching_1st_brand==1

** there are issues with a couple of stations - it looks like station ownership changed in 2020
* these 2 shell highway stops used to be  esso
replace AB_station_brand = "esso" if AB_id==68
replace matching_2nd_brand = 1 if AB_id==68
replace AB_station_brand = "esso" if AB_id==67
replace matching_2nd_brand = 1 if AB_id==67

by AB_id: drop if _n>1 & matching_2nd_brand==1

by AB_id: drop if _n==1 & matching_2nd_brand==0


by AB_id: gener matching_3rd_brand = (AB_station_brand==Brand) if _n==1 & matching_2nd_brand==0
by AB_id: replace matching_3rd_brand = matching_3rd_brand[1]
replace matching_3rd_brand=1 if matching_2nd_brand==1
replace matching_3rd_brand=1 if matching_1st_brand==1
by AB_id: drop if _n>1 & matching_3rd_brand==1
by AB_id: drop if _n==1 & matching_3rd_brand==0

* this took care of everyone

* now, make sure that we have 1:1 match
sort StID distance
by StID: keep if _n==1

keep StID

save "$P_Data_Processed/highway_stations.dta", replace 
